### File: 2_image_annotation_diffs
### Purpose: Replicate Figures 1(A), 2, and 3
### Created: 12/2/2024


#######################
## required packages
#######################
require(tidyverse)


#######################
### paths
#######################
main <- getwd()

data_dir <- str_c(main, "/data/")

results_dir <- str_c(main, "/results/")

#######################
### Read in the data and relevel
#######################

slim_image_labels <- read_csv(str_c(data_dir, "slim_image_labels.csv"),          
                              col_types = cols(image = col_character(),
                                               democrat = col_character())) %>% 
  ## reorder the factor variable so that hashtags on similar issues are together
  mutate(hashtag = fct_relevel(hashtag,
                               "familiesbelongtogether", 
                               "cleandreamactnow",
                               "abolishice",
                               "nomuslimbanever",
                               "womenswave",
                               "uniteforjustice",
                               "stopkavanaugh",
                               "riseforclimate",
                               "write4rights",
                               "sayhername",
                               "endgunviolence")) 
  

#######################
#### Figure 1(A) What % of images had "protest"?, with gender split
############################

## reshaping data
image_protest_gender <- slim_image_labels %>% 
  ## just want rates of protest by image/hashtag/annotator
  dplyr::select(hashtag, image, protest_dum, female) %>%
  ## if multiple annotators on a given image, if any of them
  ## saw protest, treat as seeing protest
  group_by(hashtag, image, female) %>%
  summarise(total_protest_score = sum(protest_dum)) %>% 
  mutate(null_protest = if_else(total_protest_score == 0, 1, 0),
         has_protest = if_else(total_protest_score >= 1, 1, 0)) %>%
  filter(!is.na(total_protest_score)) %>% 
  ungroup() %>% 
  ### now compare rates of seeing protest by gender and hashtag
  group_by(hashtag, female) %>% 
  ## convert into percent images with protest
  summarize(pct_no_protest = 100*mean(null_protest),
            sd_no_protest = 100*sd(null_protest),
            pct_with_protest = 100*mean(has_protest),
            sd_with_protest = 100*sd(has_protest),
            sem = 100*sd(has_protest)/sqrt(length(has_protest))) 


### plot design
### the colors for the gender plots
## variable is "female", and female should be purple
cols_gen <- c("1" = "purple", "0" = "orange")
## the linetypes for gender plots
lines_gen <- c("1" = "dashed", "0" = "solid")

#### Figure 1(A): Protest between genders -- for intro figure
ggplot(image_protest_gender,
       aes(x = hashtag, y = pct_with_protest, 
           color = factor(female)))  +
  geom_point(size = 3, position = position_dodge(width=0.4)) +
  geom_errorbar(aes(ymin = pct_with_protest - (1.96*sem), 
                    ymax = pct_with_protest + (1.96*sem),
                    color = factor(female), 
                    linetype = factor(female)), 
                width = 0.2,
                linewidth = 0.5,
                position = position_dodge(width=0.4)) +
  coord_flip() +    
  theme_classic() +
  theme(axis.text = element_text(size = 20),
        axis.title = element_text(size = 24),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0))) +
  theme(legend.position="none") +
  scale_color_manual(values = cols_gen) +
  scale_linetype_manual(values = lines_gen) +
  xlab("Hashtag") +
  ylab("Percent images that had protest, \nby annotator gender") 

## save the resulting figure
## if you want a PDF
#ggsave(str_c(results_dir, "1A_gender_pct_images_protest.pdf"), 
#       width = 20, height = 20, units = "cm") 

## TIFF for publication
ggsave(str_c(results_dir, "1A_gender_pct_images_protest.tiff"), 
       width = 20, height = 20, units = "cm",
       device = "tiff", dpi = 300)

#######################
#### Figure 2A: Differences in Image Annotation by Partisanship
############################

### Figure 2A uses t-tests to generate difference in means and CIs
## can loop over the relevant variables
## first subset to the relevant data and clean up variable names
img_emote_ttest <- slim_image_labels %>% 
  select(image, democrat, Q3_emot_scared:Q3_emot_disgust) %>%
  rename(Anger = Q3_emot_angry,
         Enthusiasm = Q3_emot_enthus,
         Fear = Q3_emot_afraid,
         Hate = Q3_emot_hateful,
         Hope = Q3_emot_hopeful,
         Worry = Q3_emot_worried,
         Resentment = Q3_emot_resent,
         Pride = Q3_emot_proud,
         Sadness = Q3_emot_sad,
         Bitterness = Q3_emot_bitter,
         Disgust = Q3_emot_disgust,
         Scared = Q3_emot_scared)

## the variable names to loop
emote_vars <- c("Anger", "Enthusiasm", "Fear", "Hate",
                "Hope", "Worry", "Resentment", "Pride",
                "Sadness", "Bitterness", "Disgust",
                "Scared")
  
########
## for each variable name, run ttest 
## extract diff in means and CI for that diff
  
## tibble for loop output
ttest_emote_output <- tibble(
    "varname" = character(),
    "diff" = numeric(),
    "low_ci" = numeric(),
    "high_ci" = numeric())
  
for(i in seq_along(emote_vars)){
    temp <- img_emote_ttest %>% 
      select(democrat, emote_vars[i])
    
    temp_var <- temp %>% 
      select(-democrat) %>% 
      pull()
    
    ttest_temp <- t.test(temp_var ~ temp$democrat)
    ttest_diff <- ttest_temp$estimate[1] - ttest_temp$estimate[2]
    output <- tibble(varname = emote_vars[i], 
                     diff = ttest_diff,
                     low_ci = ttest_temp$conf.int[1], 
                     high_ci = ttest_temp$conf.int[2])
    
    ttest_emote_output <- bind_rows(ttest_emote_output, output)
    
}
  
## relevel the factor by size of difference, then plot
ttest_emote_output <- ttest_emote_output %>%  
  mutate(varname = fct_reorder(varname, diff))
  
#### plot it
ggplot(ttest_emote_output) +
    geom_point(aes(x = varname, y = diff),
               size = 3) +
    geom_errorbar(aes(x = varname, y = diff,
                      ymin = low_ci, ymax = high_ci), 
                  width = 0.2,
                  size = 0.5) +
    geom_hline(yintercept = 0,
               linetype="dotted") +
    coord_flip() +
    theme_classic() +
    ylab("Difference between Republican \n and Democratic Responses") +
    xlab("Image Variable") +
    theme(axis.text = element_text(size = 20),
          axis.title = element_text(size = 24),
          axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)))

## if you want a PDF  
#ggsave(str_c(results_dir, 
#             "2A_emotion_partisan_diffs.pdf"), width = 20, units = "cm")

## TIFF for publication
ggsave(str_c(results_dir, "2A_emotion_partisan_diffs.tiff"), 
       width = 20, height = 25.5, units = "cm",
       device = "tiff", dpi = 300)

#######################
#### Figure 2B: Differences in Image Annotation by Partisanship
############################

## As with Figure 2A, t-test loops
## here by hashtag only on the protest variable

### relevant data
image_protest_ttest <- slim_image_labels %>% 
  select(hashtag, image, protest_dum, democrat)

## the list of hashtags
hashtags <- as.character(unique(image_protest_ttest$hashtag))

## for each hashtag name, run ttest of protest diffs
## extract diff in means and CI for that diff

## tibble for loop output
ttest_protest_partisan <- tibble(
  "hashtag" = character(),
  "diff" = numeric(),
  "low_ci" = numeric(),
  "high_ci" = numeric())

for(i in seq_along(hashtags)){
  temp <- image_protest_ttest %>% 
    filter(hashtag == hashtags[i])
  
  ttest_temp <- t.test(temp$protest_dum ~ temp$democrat)
  ttest_diff <- ttest_temp$estimate[1] - ttest_temp$estimate[2]
  output <- tibble(hashtag = hashtags[i], 
                   diff = ttest_diff,
                   low_ci = ttest_temp$conf.int[1], 
                   high_ci = ttest_temp$conf.int[2])
  
  ttest_protest_partisan <- bind_rows(ttest_protest_partisan, output)
  
}

## relevel the factor by size of difference, then plot
ttest_protest_partisan <- ttest_protest_partisan %>%  
  mutate(hashtag = fct_reorder(hashtag, diff))

#### plot it
ggplot(ttest_protest_partisan) +
  geom_point(aes(x = hashtag, y = diff),
             size = 3) +
  geom_errorbar(aes(x = hashtag, y = diff,
                    ymin = low_ci, ymax = high_ci), 
                width = 0.2,
                size = 0.5) +
  geom_hline(yintercept = 0,
             linetype="dotted") +
  coord_flip() +
  theme_classic() +
  ylab("Difference in Republican-Democrat \nProportion of Images with Protest") +
  xlab("Hashtag") +
  theme(axis.text = element_text(size = 20),
        axis.title = element_text(size = 24),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)))

## if you want a PDF
#ggsave(str_c(results_dir, 
#             "2B_hashtag_protest_partisan_diffs.pdf"), width = 20, units = "cm")

## TIFF for publication
ggsave(str_c(results_dir, "2B_hashtag_protest_partisan_diffs.tiff"), 
       width = 25, height = 20, units = "cm",
       device = "tiff", dpi = 300)

#######################
#### Figure 2C: Differences in Image Annotation by Partisanship
############################

## As with prior figs, t-test loops
## here just for disgust by hashtag and partisanship

### ttest version of disgust
image_disgust_ttest <- slim_image_labels %>% 
  select(hashtag, image, Q3_emot_disgust, democrat) %>% 
  rename(Disgust = Q3_emot_disgust)

## for each hashtag name, run ttest of protest diffs
## extract diff in means and CI for that diff
## in the loop
## tibble for loop output
ttest_disgust_partisan <- tibble(
  "hashtag" = character(),
  "diff" = numeric(),
  "low_ci" = numeric(),
  "high_ci" = numeric())

for(i in seq_along(hashtags)){
  temp <- image_disgust_ttest %>% 
    filter(hashtag == hashtags[i])
  
  ttest_temp <- t.test(temp$Disgust ~ temp$democrat)
  ttest_diff <- ttest_temp$estimate[1] - ttest_temp$estimate[2]
  output <- tibble(hashtag = hashtags[i], 
                   diff = ttest_diff,
                   low_ci = ttest_temp$conf.int[1], 
                   high_ci = ttest_temp$conf.int[2])
  
  ttest_disgust_partisan <- bind_rows(ttest_disgust_partisan, output)
  
}

## relevel the factor by size of difference, then plot
ttest_disgust_partisan<- ttest_disgust_partisan %>%  
  mutate(hashtag = fct_reorder(hashtag, diff))


#### plot it
ggplot(ttest_disgust_partisan) +
  geom_point(aes(x = hashtag, y = diff),
             size = 3) +
  geom_errorbar(aes(x = hashtag, y = diff,
                    ymin = low_ci, ymax = high_ci), 
                width = 0.2,
                size = 0.5) +
  geom_hline(yintercept = 0,
             linetype="dotted") +
  coord_flip() +
  theme_classic() +
  ylab("Difference in Republican-Democrat \nRatings of Disgust") +
  xlab("Hashtag") +
  theme(axis.text = element_text(size = 20),
        axis.title = element_text(size = 24),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)))

## if you want a PDF
#ggsave(str_c(results_dir, 
#             "2C_hashtag_disgust_partisan_diffs.pdf"), width = 20, units = "cm")

## TIFF for publication
ggsave(str_c(results_dir, "2C_hashtag_disgust_partisan_diffs.tiff"), 
       width = 25, height = 20, units = "cm",
       device = "tiff", dpi = 300)

#######################
#### Figure 3A: Differences in Image Annotation by Gender
############################

### As with previous, loop over the different variables
### Here, showing emotions differences between male/female

### ttest for the differences plot instead 
## for gender over emotions variables
img_emote_ttest_gender <- slim_image_labels %>% 
  select(image, female, Q3_emot_scared:Q3_emot_disgust) %>% 
  rename(Anger = Q3_emot_angry,
         Enthusiasm = Q3_emot_enthus,
         Fear = Q3_emot_afraid,
         Hate = Q3_emot_hateful,
         Hope = Q3_emot_hopeful,
         Worry = Q3_emot_worried,
         Resentment = Q3_emot_resent,
         Pride = Q3_emot_proud,
         Sadness = Q3_emot_sad,
         Bitterness = Q3_emot_bitter,
         Disgust = Q3_emot_disgust,
         Scared = Q3_emot_scared)

## the variable names to loop is same as above(emote_vars)
## for each variable name, run ttest 
## extract diff in means and CI for that diff

## tibble for loop output
ttest_emote_output_gender <- tibble(
  "varname" = character(),
  "diff" = numeric(),
  "low_ci" = numeric(),
  "high_ci" = numeric())

for(i in seq_along(emote_vars)){
  temp <- img_emote_ttest_gender %>% 
    select(female, emote_vars[i])
  
  temp_var <- temp %>% 
    select(-female) %>% 
    pull()
  
  ttest_temp <- t.test(temp_var ~ temp$female)
  ttest_diff <- ttest_temp$estimate[1] - ttest_temp$estimate[2]
  output <- tibble(varname = emote_vars[i], 
                   diff = ttest_diff,
                   low_ci = ttest_temp$conf.int[1], 
                   high_ci = ttest_temp$conf.int[2])
  
  ttest_emote_output_gender <- bind_rows(ttest_emote_output_gender, output)
  
}

## relevel the factor by size of difference, then plot
ttest_emote_output_gender<- ttest_emote_output_gender %>%  
  mutate(varname = fct_reorder(varname, diff))


#### plot it
ggplot(ttest_emote_output_gender) +
  geom_point(aes(x = varname, y = diff),
             size = 3) +
  geom_errorbar(aes(x = varname, y = diff,
                    ymin = low_ci, ymax = high_ci), 
                width = 0.2,
                size = 0.5) +
  geom_hline(yintercept = 0,
             linetype="dotted") +
  coord_flip() +
  theme_classic() +
  ylab("Difference between Male \n and Female Labels") +
  xlab("Image Variable") +
  theme(axis.text = element_text(size = 20),
        axis.title = element_text(size = 24),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)))

## if you want a PDF
#ggsave(str_c(results_dir, 
#             "3A_emotion_gender_diffs.pdf"), width = 20, units = "cm")

## TIFF for publication
ggsave(str_c(results_dir, "3A_emotion_gender_diffs.tiff"), 
       width = 20, height = 25.5, units = "cm",
       device = "tiff", dpi = 300)


#######################
#### Figure 3B: Differences in Image Annotation by Gender
############################

### As with previous, loop over the different variables
### Here, showing protest differences between male/female by hashtag

### ttest version of protest by gender plot
image_protest_ttest_gender <- slim_image_labels %>% 
  select(hashtag, image, protest_dum, female)

## for each hashtag name, run ttest of protest diffs
## extract diff in means and CI for that diff
## in the loop
## tibble for loop output
ttest_protest_gender <- tibble(
  "hashtag" = character(),
  "diff" = numeric(),
  "low_ci" = numeric(),
  "high_ci" = numeric())

for(i in seq_along(hashtags)){
  temp <- image_protest_ttest_gender %>% 
    filter(hashtag == hashtags[i])
  
  ttest_temp <- t.test(temp$protest_dum ~ temp$female)
  ttest_diff <- ttest_temp$estimate[1] - ttest_temp$estimate[2]
  output <- tibble(hashtag = hashtags[i], 
                   diff = ttest_diff,
                   low_ci = ttest_temp$conf.int[1], 
                   high_ci = ttest_temp$conf.int[2])
  
  ttest_protest_gender <- bind_rows(ttest_protest_gender, output)
  
}

## relevel the factor, then plot
ttest_protest_gender<- ttest_protest_gender %>%  
  mutate(hashtag = fct_reorder(hashtag, diff))

#### plot it
ggplot(ttest_protest_gender) +
  geom_point(aes(x = hashtag, y = diff),
             size = 3) +
  geom_errorbar(aes(x = hashtag, y = diff,
                    ymin = low_ci, ymax = high_ci), 
                width = 0.2,
                size = 0.5) +
  geom_hline(yintercept = 0,
             linetype="dotted") +
  coord_flip() +
  theme_classic() +
  ylab("Difference in Male-Female Proportion\n of Images with Protest") +
  xlab("Hashtag") +
  theme(axis.text = element_text(size = 20),
        axis.title = element_text(size = 24),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)))

## if you want a PDF
#ggsave(str_c(results_dir, 
#             "3B_hashtag_protest_gender_diffs.pdf"), width = 20, units = "cm")

## TIFF for publication
ggsave(str_c(results_dir, "3B_hashtag_protest_gender_diffs.tiff"), 
       width = 25, height = 20, units = "cm",
       device = "tiff", dpi = 300)

#######################
#### Figure 3C: Differences in Image Annotation by Gender
############################

### As with previous, loop over the different variables
### Here, showing disgust differences between male/female by hashtag

### ttest version of disgust
image_disgust_ttest_gender <- slim_image_labels %>% 
  select(hashtag, image, Q3_emot_disgust, female) %>% 
  rename(Disgust = Q3_emot_disgust)

## for each hashtag name, run ttest of protest diffs
## extract diff in means and CI for that diff
## in the loop
## tibble for loop output
ttest_disgust_gender <- tibble(
  "hashtag" = character(),
  "diff" = numeric(),
  "low_ci" = numeric(),
  "high_ci" = numeric())

for(i in seq_along(hashtags)){
  temp <- image_disgust_ttest_gender %>% 
    filter(hashtag == hashtags[i])
  
  ttest_temp <- t.test(temp$Disgust ~ temp$female)
  ttest_diff <- ttest_temp$estimate[1] - ttest_temp$estimate[2]
  output <- tibble(hashtag = hashtags[i], 
                   diff = ttest_diff,
                   low_ci = ttest_temp$conf.int[1], 
                   high_ci = ttest_temp$conf.int[2])
  
  ttest_disgust_gender <- bind_rows(ttest_disgust_gender, output)
  
}

## relevel the factor, then plot
ttest_disgust_gender <- ttest_disgust_gender %>%  
  mutate(hashtag = fct_reorder(hashtag, diff))


#### plot it
ggplot(ttest_disgust_gender) +
  geom_point(aes(x = hashtag, y = diff),
             size = 3) +
  geom_errorbar(aes(x = hashtag, y = diff,
                    ymin = low_ci, ymax = high_ci), 
                width = 0.2,
                size = 0.5) +
  geom_hline(yintercept = 0,
             linetype="dotted") +
  coord_flip() +
  theme_classic() +
  ylab("Difference in Male-Female \nRatings of Disgust") +
  xlab("Hashtag") +
  theme(axis.text = element_text(size = 20),
        axis.title = element_text(size = 24),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)))

## if you want a PDF
#ggsave(str_c(results_dir, 
#             "3C_hashtag_disgust_gender_diffs.pdf"), width = 20, units = "cm")

## TIFF for publication
ggsave(str_c(results_dir, "3C_hashtag_disgust_gender_diffs.tiff"), 
       width = 25, height = 20, units = "cm",
       device = "tiff", dpi = 300)
